/* 
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

using System;

using BufferedIndexInput = Lucene.Net.Store.BufferedIndexInput;
using Directory = Lucene.Net.Store.Directory;
using IndexInput = Lucene.Net.Store.IndexInput;

namespace Lucene.Net.Index
{
    class TermVectorsReader : System.ICloneable, IDisposable
    {
        
        // NOTE: if you make a new format, it must be larger than
        // the current format
        internal const int FORMAT_VERSION = 2;
        
        // Changes to speed up bulk merging of term vectors:
        internal const int FORMAT_VERSION2 = 3;
        
        // Changed strings to UTF8 with length-in-bytes not length-in-chars
        internal const int FORMAT_UTF8_LENGTH_IN_BYTES = 4;
        
        // NOTE: always change this if you switch to a new format!
        internal static readonly int FORMAT_CURRENT = FORMAT_UTF8_LENGTH_IN_BYTES;
        
        //The size in bytes that the FORMAT_VERSION will take up at the beginning of each file 
        internal const int FORMAT_SIZE = 4;
        
        internal const byte STORE_POSITIONS_WITH_TERMVECTOR = (byte) (0x1);
        internal const byte STORE_OFFSET_WITH_TERMVECTOR = (byte) (0x2);
        
        private FieldInfos fieldInfos;
        
        private IndexInput tvx;
        private IndexInput tvd;
        private IndexInput tvf;
        private int size;
        private int numTotalDocs;
        
        // The docID offset where our docs begin in the index
        // file.  This will be 0 if we have our own private file.
        private int docStoreOffset;
        
        private int format;
        private bool isDisposed;

        internal TermVectorsReader(Directory d, System.String segment, FieldInfos fieldInfos):this(d, segment, fieldInfos, BufferedIndexInput.BUFFER_SIZE)
        {
        }
        
        internal TermVectorsReader(Directory d, System.String segment, FieldInfos fieldInfos, int readBufferSize):this(d, segment, fieldInfos, readBufferSize, - 1, 0)
        {
        }
        
        internal TermVectorsReader(Directory d, System.String segment, FieldInfos fieldInfos, int readBufferSize, int docStoreOffset, int size)
        {
            bool success = false;
            
            try
            {
                if (d.FileExists(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION))
                {
                    tvx = d.OpenInput(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION, readBufferSize);
                    format = CheckValidFormat(tvx);
                    tvd = d.OpenInput(segment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION, readBufferSize);
                    int tvdFormat = CheckValidFormat(tvd);
                    tvf = d.OpenInput(segment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION, readBufferSize);
                    int tvfFormat = CheckValidFormat(tvf);

                    System.Diagnostics.Debug.Assert(format == tvdFormat);
                    System.Diagnostics.Debug.Assert(format == tvfFormat);

                    if (format >= FORMAT_VERSION2)
                    {
                        System.Diagnostics.Debug.Assert((tvx.Length() - FORMAT_SIZE) % 16 == 0);
                        numTotalDocs = (int)(tvx.Length() >> 4);
                    }
                    else
                    {
                        System.Diagnostics.Debug.Assert((tvx.Length() - FORMAT_SIZE) % 8 == 0);
                        numTotalDocs = (int)(tvx.Length() >> 3);
                    }

                    if (-1 == docStoreOffset)
                    {
                        this.docStoreOffset = 0;
                        this.size = numTotalDocs;
                        System.Diagnostics.Debug.Assert(size == 0 || numTotalDocs == size);
                    }
                    else
                    {
                        this.docStoreOffset = docStoreOffset;
                        this.size = size;
                        // Verify the file is long enough to hold all of our
                        // docs
                        System.Diagnostics.Debug.Assert(numTotalDocs >= size + docStoreOffset, "numTotalDocs=" + numTotalDocs + " size=" + size + " docStoreOffset=" + docStoreOffset);
                    }
                }
                else
                {
                    // If all documents flushed in a segment had hit
                    // non-aborting exceptions, it's possible that
                    // FieldInfos.hasVectors returns true yet the term
                    // vector files don't exist.
                    format = 0;
                }

                
                this.fieldInfos = fieldInfos;
                success = true;
            }
            finally
            {
                // With lock-less commits, it's entirely possible (and
                // fine) to hit a FileNotFound exception above. In
                // this case, we want to explicitly close any subset
                // of things that were opened so that we don't have to
                // wait for a GC to do so.
                if (!success)
                {
                    Dispose();
                }
            }
        }
        
        // Used for bulk copy when merging
        internal virtual IndexInput GetTvdStream()
        {
            return tvd;
        }
        
        // Used for bulk copy when merging
        internal virtual IndexInput GetTvfStream()
        {
            return tvf;
        }
        
        private void  SeekTvx(int docNum)
        {
            if (format < FORMAT_VERSION2)
                tvx.Seek((docNum + docStoreOffset) * 8L + FORMAT_SIZE);
            else
                tvx.Seek((docNum + docStoreOffset) * 16L + FORMAT_SIZE);
        }
        
        internal virtual bool CanReadRawDocs()
        {
            return format >= FORMAT_UTF8_LENGTH_IN_BYTES;
        }
        
        /// <summary>Retrieve the length (in bytes) of the tvd and tvf
        /// entries for the next numDocs starting with
        /// startDocID.  This is used for bulk copying when
        /// merging segments, if the field numbers are
        /// congruent.  Once this returns, the tvf &amp; tvd streams
        /// are seeked to the startDocID. 
        /// </summary>
        internal void  RawDocs(int[] tvdLengths, int[] tvfLengths, int startDocID, int numDocs)
        {
            
            if (tvx == null)
            {
                for (int i = 0; i < tvdLengths.Length; i++)
                {
                    tvdLengths[i] = 0;
                }
                for (int i = 0; i < tvfLengths.Length; i++)
                {
                    tvfLengths[i] = 0;
                }
                return ;
            }
            
            // SegmentMerger calls canReadRawDocs() first and should
            // not call us if that returns false.
            if (format < FORMAT_VERSION2)
                throw new System.SystemException("cannot read raw docs with older term vector formats");
            
            SeekTvx(startDocID);
            
            long tvdPosition = tvx.ReadLong();
            tvd.Seek(tvdPosition);
            
            long tvfPosition = tvx.ReadLong();
            tvf.Seek(tvfPosition);
            
            long lastTvdPosition = tvdPosition;
            long lastTvfPosition = tvfPosition;
            
            int count = 0;
            while (count < numDocs)
            {
                int docID = docStoreOffset + startDocID + count + 1;
                System.Diagnostics.Debug.Assert(docID <= numTotalDocs);
                if (docID < numTotalDocs)
                {
                    tvdPosition = tvx.ReadLong();
                    tvfPosition = tvx.ReadLong();
                }
                else
                {
                    tvdPosition = tvd.Length();
                    tvfPosition = tvf.Length();
                    System.Diagnostics.Debug.Assert(count == numDocs - 1);
                }
                tvdLengths[count] = (int) (tvdPosition - lastTvdPosition);
                tvfLengths[count] = (int) (tvfPosition - lastTvfPosition);
                count++;
                lastTvdPosition = tvdPosition;
                lastTvfPosition = tvfPosition;
            }
        }
        
        private int CheckValidFormat(IndexInput in_Renamed)
        {
            int format = in_Renamed.ReadInt();
            if (format > FORMAT_CURRENT)
            {
                throw new CorruptIndexException("Incompatible format version: " + format + " expected " + FORMAT_CURRENT + " or less");
            }
            return format;
        }
        
        public void Dispose()
        {
            Dispose(true);
        }

        protected virtual void Dispose(bool disposing)
        {
            if (isDisposed) return;

            if (disposing)
            {
                // make all effort to close up. Keep the first exception
                // and throw it as a new one.
                System.IO.IOException keep = null;
                if (tvx != null)
                    try
                    {
                        tvx.Close();
                    }
                    catch (System.IO.IOException e)
                    {
                        if (keep == null)
                            keep = e;
                    }
                if (tvd != null)
                    try
                    {
                        tvd.Close();
                    }
                    catch (System.IO.IOException e)
                    {
                        if (keep == null)
                            keep = e;
                    }
                if (tvf != null)
                    try
                    {
                        tvf.Close();
                    }
                    catch (System.IO.IOException e)
                    {
                        if (keep == null)
                            keep = e;
                    }
                if (keep != null)
                {
                    throw new System.IO.IOException(keep.StackTrace);
                }
            }

            isDisposed = true;
        }
        
        /// <summary> </summary>
        /// <returns> The number of documents in the reader
        /// </returns>
        internal virtual int Size()
        {
            return size;
        }
        
        public virtual void  Get(int docNum, System.String field, TermVectorMapper mapper)
        {
            if (tvx != null)
            {
                int fieldNumber = fieldInfos.FieldNumber(field);
                //We need to account for the FORMAT_SIZE at when seeking in the tvx
                //We don't need to do this in other seeks because we already have the
                // file pointer
                //that was written in another file
                SeekTvx(docNum);
                //System.out.println("TVX Pointer: " + tvx.getFilePointer());
                long tvdPosition = tvx.ReadLong();
                
                tvd.Seek(tvdPosition);
                int fieldCount = tvd.ReadVInt();
                //System.out.println("Num Fields: " + fieldCount);
                // There are only a few fields per document. We opt for a full scan
                // rather then requiring that they be ordered. We need to read through
                // all of the fields anyway to get to the tvf pointers.
                int number = 0;
                int found = - 1;
                for (int i = 0; i < fieldCount; i++)
                {
                    if (format >= FORMAT_VERSION)
                        number = tvd.ReadVInt();
                    else
                        number += tvd.ReadVInt();
                    
                    if (number == fieldNumber)
                        found = i;
                }
                
                // This field, although valid in the segment, was not found in this
                // document
                if (found != - 1)
                {
                    // Compute position in the tvf file
                    long position;
                    if (format >= FORMAT_VERSION2)
                        position = tvx.ReadLong();
                    else
                        position = tvd.ReadVLong();
                    for (int i = 1; i <= found; i++)
                        position += tvd.ReadVLong();

                    mapper.SetDocumentNumber(docNum);
                    ReadTermVector(field, position, mapper);
                }
                else
                {
                    //System.out.println("Fieldable not found");
                }
            }
            else
            {
                //System.out.println("No tvx file");
            }
        }
        
        
        
        /// <summary> Retrieve the term vector for the given document and field</summary>
        /// <param name="docNum">The document number to retrieve the vector for
        /// </param>
        /// <param name="field">The field within the document to retrieve
        /// </param>
        /// <returns> The TermFreqVector for the document and field or null if there is no termVector for this field.
        /// </returns>
        /// <throws>  IOException if there is an error reading the term vector files </throws>
        public /*internal*/ virtual ITermFreqVector Get(int docNum, System.String field)
        {
            // Check if no term vectors are available for this segment at all
            ParallelArrayTermVectorMapper mapper = new ParallelArrayTermVectorMapper();
            Get(docNum, field, mapper);
            
            return mapper.MaterializeVector();
        }
        
        // Reads the String[] fields; you have to pre-seek tvd to
        // the right point
        private System.String[] ReadFields(int fieldCount)
        {
            int number = 0;
            System.String[] fields = new System.String[fieldCount];
            
            for (int i = 0; i < fieldCount; i++)
            {
                if (format >= FORMAT_VERSION)
                    number = tvd.ReadVInt();
                else
                    number += tvd.ReadVInt();
                
                fields[i] = fieldInfos.FieldName(number);
            }
            
            return fields;
        }
        
        // Reads the long[] offsets into TVF; you have to pre-seek
        // tvx/tvd to the right point
        private long[] ReadTvfPointers(int fieldCount)
        {
            // Compute position in the tvf file
            long position;
            if (format >= FORMAT_VERSION2)
                position = tvx.ReadLong();
            else
                position = tvd.ReadVLong();
            
            long[] tvfPointers = new long[fieldCount];
            tvfPointers[0] = position;
            
            for (int i = 1; i < fieldCount; i++)
            {
                position += tvd.ReadVLong();
                tvfPointers[i] = position;
            }
            
            return tvfPointers;
        }
        
        /// <summary> Return all term vectors stored for this document or null if the could not be read in.
        /// 
        /// </summary>
        /// <param name="docNum">The document number to retrieve the vector for
        /// </param>
        /// <returns> All term frequency vectors
        /// </returns>
        /// <throws>  IOException if there is an error reading the term vector files  </throws>
        public /*internal*/ virtual ITermFreqVector[] Get(int docNum)
        {
            ITermFreqVector[] result = null;
            if (tvx != null)
            {
                //We need to offset by
                SeekTvx(docNum);
                long tvdPosition = tvx.ReadLong();
                
                tvd.Seek(tvdPosition);
                int fieldCount = tvd.ReadVInt();
                
                // No fields are vectorized for this document
                if (fieldCount != 0)
                {
                    System.String[] fields = ReadFields(fieldCount);
                    long[] tvfPointers = ReadTvfPointers(fieldCount);
                    result = ReadTermVectors(docNum, fields, tvfPointers);
                }
            }
            else
            {
                //System.out.println("No tvx file");
            }
            return result;
        }
        
        public virtual void  Get(int docNumber, TermVectorMapper mapper)
        {
            // Check if no term vectors are available for this segment at all
            if (tvx != null)
            {
                //We need to offset by
                
                SeekTvx(docNumber);
                long tvdPosition = tvx.ReadLong();
                
                tvd.Seek(tvdPosition);
                int fieldCount = tvd.ReadVInt();
                
                // No fields are vectorized for this document
                if (fieldCount != 0)
                {
                    System.String[] fields = ReadFields(fieldCount);
                    long[] tvfPointers = ReadTvfPointers(fieldCount);
                    mapper.SetDocumentNumber(docNumber);
                    ReadTermVectors(fields, tvfPointers, mapper);
                }
            }
            else
            {
                //System.out.println("No tvx file");
            }
        }
        
        
        private SegmentTermVector[] ReadTermVectors(int docNum, System.String[] fields, long[] tvfPointers)
        {
            SegmentTermVector[] res = new SegmentTermVector[fields.Length];
            for (int i = 0; i < fields.Length; i++)
            {
                var mapper = new ParallelArrayTermVectorMapper();
                mapper.SetDocumentNumber(docNum);
                ReadTermVector(fields[i], tvfPointers[i], mapper);
                res[i] = (SegmentTermVector) mapper.MaterializeVector();
            }
            return res;
        }
        
        private void  ReadTermVectors(System.String[] fields, long[] tvfPointers, TermVectorMapper mapper)
        {
            for (int i = 0; i < fields.Length; i++)
            {
                ReadTermVector(fields[i], tvfPointers[i], mapper);
            }
        }
        
        
        /// <summary> </summary>
        /// <param name="field">The field to read in
        /// </param>
        /// <param name="tvfPointer">The pointer within the tvf file where we should start reading
        /// </param>
        /// <param name="mapper">The mapper used to map the TermVector
        /// </param>
        /// <throws>  IOException </throws>
        private void  ReadTermVector(System.String field, long tvfPointer, TermVectorMapper mapper)
        {
            
            // Now read the data from specified position
            //We don't need to offset by the FORMAT here since the pointer already includes the offset
            tvf.Seek(tvfPointer);
            
            int numTerms = tvf.ReadVInt();
            //System.out.println("Num Terms: " + numTerms);
            // If no terms - return a constant empty termvector. However, this should never occur!
            if (numTerms == 0)
                return ;
            
            bool storePositions;
            bool storeOffsets;
            
            if (format >= FORMAT_VERSION)
            {
                byte bits = tvf.ReadByte();
                storePositions = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
                storeOffsets = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
            }
            else
            {
                tvf.ReadVInt();
                storePositions = false;
                storeOffsets = false;
            }
            mapper.SetExpectations(field, numTerms, storeOffsets, storePositions);
            int start = 0;
            int deltaLength = 0;
            int totalLength = 0;
            byte[] byteBuffer;
            char[] charBuffer;
            bool preUTF8 = format < FORMAT_UTF8_LENGTH_IN_BYTES;
            
            // init the buffers
            if (preUTF8)
            {
                charBuffer = new char[10];
                byteBuffer = null;
            }
            else
            {
                charBuffer = null;
                byteBuffer = new byte[20];
            }
            
            for (int i = 0; i < numTerms; i++)
            {
                start = tvf.ReadVInt();
                deltaLength = tvf.ReadVInt();
                totalLength = start + deltaLength;
                
                System.String term;
                
                if (preUTF8)
                {
                    // Term stored as java chars
                    if (charBuffer.Length < totalLength)
                    {
                        char[] newCharBuffer = new char[(int) (1.5 * totalLength)];
                        Array.Copy(charBuffer, 0, newCharBuffer, 0, start);
                        charBuffer = newCharBuffer;
                    }
                    tvf.ReadChars(charBuffer, start, deltaLength);
                    term = new System.String(charBuffer, 0, totalLength);
                }
                else
                {
                    // Term stored as utf8 bytes
                    if (byteBuffer.Length < totalLength)
                    {
                        byte[] newByteBuffer = new byte[(int) (1.5 * totalLength)];
                        Array.Copy(byteBuffer, 0, newByteBuffer, 0, start);
                        byteBuffer = newByteBuffer;
                    }
                    tvf.ReadBytes(byteBuffer, start, deltaLength);
                    term = System.Text.Encoding.UTF8.GetString(byteBuffer, 0, totalLength);
                }
                int freq = tvf.ReadVInt();
                int[] positions = null;
                if (storePositions)
                {
                    //read in the positions
                    //does the mapper even care about positions?
                    if (mapper.IsIgnoringPositions == false)
                    {
                        positions = new int[freq];
                        int prevPosition = 0;
                        for (int j = 0; j < freq; j++)
                        {
                            positions[j] = prevPosition + tvf.ReadVInt();
                            prevPosition = positions[j];
                        }
                    }
                    else
                    {
                        //we need to skip over the positions.  Since these are VInts, I don't believe there is anyway to know for sure how far to skip
                        //
                        for (int j = 0; j < freq; j++)
                        {
                            tvf.ReadVInt();
                        }
                    }
                }
                TermVectorOffsetInfo[] offsets = null;
                if (storeOffsets)
                {
                    //does the mapper even care about offsets?
                    if (mapper.IsIgnoringOffsets == false)
                    {
                        offsets = new TermVectorOffsetInfo[freq];
                        int prevOffset = 0;
                        for (int j = 0; j < freq; j++)
                        {
                            int startOffset = prevOffset + tvf.ReadVInt();
                            int endOffset = startOffset + tvf.ReadVInt();
                            offsets[j] = new TermVectorOffsetInfo(startOffset, endOffset);
                            prevOffset = endOffset;
                        }
                    }
                    else
                    {
                        for (int j = 0; j < freq; j++)
                        {
                            tvf.ReadVInt();
                            tvf.ReadVInt();
                        }
                    }
                }
                mapper.Map(term, freq, offsets, positions);
            }
        }
        
        public virtual System.Object Clone()
        {
            
            TermVectorsReader clone = (TermVectorsReader) base.MemberwiseClone();
            
            // These are null when a TermVectorsReader was created
            // on a segment that did not have term vectors saved
            if (tvx != null && tvd != null && tvf != null)
            {
                clone.tvx = (IndexInput) tvx.Clone();
                clone.tvd = (IndexInput) tvd.Clone();
                clone.tvf = (IndexInput) tvf.Clone();
            }
            
            return clone;
        }
    }
    
    
    /// <summary> Models the existing parallel array structure</summary>
    class ParallelArrayTermVectorMapper:TermVectorMapper
    {
        
        private System.String[] terms;
        private int[] termFreqs;
        private int[][] positions;
        private TermVectorOffsetInfo[][] offsets;
        private int currentPosition;
        private bool storingOffsets;
        private bool storingPositions;
        private System.String field;
        
        public override void  SetExpectations(System.String field, int numTerms, bool storeOffsets, bool storePositions)
        {
            this.field = field;
            terms = new System.String[numTerms];
            termFreqs = new int[numTerms];
            this.storingOffsets = storeOffsets;
            this.storingPositions = storePositions;
            if (storePositions)
                this.positions = new int[numTerms][];
            if (storeOffsets)
                this.offsets = new TermVectorOffsetInfo[numTerms][];
        }
        
        public override void  Map(System.String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions)
        {
            terms[currentPosition] = term;
            termFreqs[currentPosition] = frequency;
            if (storingOffsets)
            {
                this.offsets[currentPosition] = offsets;
            }
            if (storingPositions)
            {
                this.positions[currentPosition] = positions;
            }
            currentPosition++;
        }
        
        /// <summary> Construct the vector</summary>
        /// <returns> The <see cref="ITermFreqVector" /> based on the mappings.
        /// </returns>
        public virtual ITermFreqVector MaterializeVector()
        {
            SegmentTermVector tv = null;
            if (field != null && terms != null)
            {
                if (storingPositions || storingOffsets)
                {
                    tv = new SegmentTermPositionVector(field, terms, termFreqs, positions, offsets);
                }
                else
                {
                    tv = new SegmentTermVector(field, terms, termFreqs);
                }
            }
            return tv;
        }
    }
}